Load libraries and DE genes

library(ggplot2)
library(readxl)
library(ggrepel)
library(kableExtra)
library(tidyverse)
library(org.Hs.eg.db)
library(clusterProfiler)
library(enrichplot)
library(msigdbr)
library(ggplot2)
hs=get("org.Hs.eg.db")

MEDIAsave="/home/mclaudia/MEDIA Project/Codici_finali/"
MEDIAgraph="/home/mclaudia/MEDIA Project/Codici_finali/Paper_figures/"

load(paste0(MEDIAsave,"DE_DESeq2_SCell.RData"))
singleCell <- df.res_sc
singleCell$gene_name <-rownames(singleCell)

load(paste0(MEDIAsave,"genide-bulk-unpaired_DE.RData"))

unpaired <-OAvsnonOA

load(paste0(MEDIAsave,"results_DE_EGApaired.RData"))
paired <-df.res

load(paste0(MEDIAsave,"DEgenes_from_validation.RData"))
valid <-dff

rm(df.res,OAvsnonOA,df.res_sc)

up_genes <- read_excel(paste0(MEDIAsave,"up_dw_genes_consensus.xlsx"), sheet=1)
dw_genes <- read_excel(paste0(MEDIAsave,"up_dw_genes_consensus.xlsx"), sheet=2)

Pre-process dataset for the Enrichment Bubble Plot construction

riskGenes <-c("TNFSF11","LOXL3","DNER","TSPAN2","THBS3","ASPN","HTRA1","DYSF")

ddsc <-singleCell$gene_name[grep("\\.",singleCell$gene_name)]
ss=ddsc[-grep("^AP0.+|^RP.+-|^CT.-.+|^AC.+|^AL.|^GS1-.+|^XX.+|^LL.+",ddsc)]

singleCell <-singleCell[!singleCell$gene_name %in% c("CRYBG3.1", "SRSF10.1"),]
ss <-ss[!ss%in%c("CRYBG3.1", "SRSF10.1")]

singleCell[singleCell$gene_name %in% ss,"gene_name"] <- gsub("\\..*","",ss)
rm(ss,ddsc)

singleCell <-singleCell[,c(2,5,6)]
colnames(singleCell) <- c("log2FC","p_adj","gene_name")
singleCell$class <-"Dataset 1"

paired <-paired[,c(3,7,8)]
colnames(paired) <- c("log2FC","p_adj","gene_name")
paired$class <-"Dataset 2"

unpaired <-unpaired[,c(3,7,8)]
colnames(unpaired) <- c("log2FC","p_adj","gene_name")
unpaired$class <-"Dataset 3"

valid <-valid[,c(2,3,1)]
colnames(valid) <- c("log2FC","p_adj","gene_name")
valid$class <-"Dataset 4/Validation"

total_genes <-rbind(singleCell, paired, unpaired, valid)
rownames(total_genes) <-paste0(1:nrow(total_genes))

total_genes[1:10,]
##       log2FC        p_adj gene_name     class
## 1  14.356062 1.319705e-73    COL1A1 Dataset 1
## 2   1.605177 1.141659e-64    ABI3BP Dataset 1
## 3   2.863385 2.653574e-64     TGFBI Dataset 1
## 4   1.685722 8.423114e-60    TSPAN2 Dataset 1
## 5   2.232388 8.576118e-53    PDLIM7 Dataset 1
## 6  12.742026 3.912155e-49     CRLF1 Dataset 1
## 7   1.863951 7.741079e-48      MT1G Dataset 1
## 8   2.173891 1.781909e-47    CRTAC1 Dataset 1
## 9  -1.184766 1.076830e-42     KLF10 Dataset 1
## 10 11.641215 4.644320e-41     CLIC3 Dataset 1

Annotation

total_genes$col <-"gray50"
total_genes[total_genes$log2FC <= -1.5 & total_genes$class %in% c("Dataset 1","Dataset 3","Dataset 4/Validation"), "col"] <- "springgreen3"
total_genes[total_genes$log2FC <= -0.8 & total_genes$class=="Dataset 2", "col"] <- "springgreen3"


total_genes[total_genes$log2FC >= 1.5 & total_genes$class %in% c("Dataset 1","Dataset 3","Dataset 4/Validation"), "col"] <- "red"
total_genes[total_genes$log2FC >= 0.8 & total_genes$class=="Dataset 2", "col"] <- "red"

total_genes[total_genes$gene_name %in% c(up_genes$gene,dw_genes$gene), "col"] <- "blue"
total_genes[total_genes$p_adj > 0.05, "col"] <- "gray50"

total_genes$label <- NA
total_genes[total_genes$gene_name %in% riskGenes, "label"] <- total_genes[total_genes$gene_name %in% riskGenes,"gene_name"]


total_genes$Shape <- "unselected"
total_genes[total_genes$gene_name %in% riskGenes, "Shape"] <- "selected risk.sc."

total_genes$ss <-1
total_genes[total_genes$gene_name %in% riskGenes, "ss"] <- 3

Volcano Plots

ggplot(data=total_genes, aes(x=log2FC, y=-log10(p_adj), color=col,label=label, shape=Shape)) +
  geom_point(size=total_genes$ss) +
  geom_label_repel(size=5,
                   min.segment.length = 0,direction = "both",
                   max.overlaps =40,
                   color="black",
                   nudge_x=1,
                   segment.linetype=2) +
  scale_shape_manual(values=c(8, 20))+
  scale_color_manual(values = c("red","gray50","springgreen3","blue"),
                     breaks = c("red","gray50","springgreen3","blue"),
                     labels = c("UP","notDE","DOWN","consensus sig.")) +
  labs(color="Legend")+
  geom_hline(yintercept=-log10(0.05), col="gray", linetype=2)+
  theme_bw()+
  facet_wrap(.~class, scales = "free_y", ncol=2)+
  theme(strip.text = element_text(size = 18, face = "bold"),
      legend.title=element_text(size=16), 
      legend.text=element_text(size=14),
      axis.title=element_text(size=15))+
      xlab("log2(FC)")+
      ylab("-log10(FDR)")+
       guides(color = guide_legend(override.aes = list(size = 4)),
              shape = guide_legend(override.aes = list(size = 4)))

GSEA: GO BP and REACTOME analyses, select processes commonly enriched by all the datasets

genes1 <-as.numeric(total_genes[total_genes$class=="Dataset 1",1])
names(genes1) <-total_genes[total_genes$class=="Dataset 1",3]

genes2 <-as.numeric(total_genes[total_genes$class=="Dataset 2",1])
names(genes2) <-total_genes[total_genes$class=="Dataset 2",3]

genes3 <-as.numeric(total_genes[total_genes$class=="Dataset 3",1])
names(genes3) <-total_genes[total_genes$class=="Dataset 3",3]

list_genes <-list(genes1, genes2, genes3)

list_obj <-vector("list",3)
names(list_obj) <-c("gsebp1","gsebp2","gsebp3")

list_objR <-vector("list",3)
names(list_objR) <-c("ans1","ans2","ans3")


rc <-msigdbr(species = "Homo sapiens", category = "C2", subcategory = "CP:REACTOME")
sig <-rc[,c(3,4)]
sig$gs_name <-gsub("^REACTOME_","",sig$gs_name)

for(i in 1:length(list_genes)){
  x <-sort(list_genes[[i]], decreasing = TRUE)
  
  gsebp <- gseGO(geneList=x, 
                 ont ="BP", 
                 keyType = "SYMBOL", 
                 pvalueCutoff = 0.15,
                 verbose = TRUE, 
                 minGSSize = 5,
                 OrgDb = hs, 
                 pAdjustMethod = "fdr")
  list_obj[[i]] <-gsebp
  
  gsreac <- clusterProfiler::GSEA(x,TERM2GENE = sig,
                               minGSSize =5, pAdjustMethod = "fdr",
                               pvalueCutoff =0.15)
  list_objR[[i]] <-gsreac
}


rb1=list_obj[[1]]@result
rb1$class <-"Dataset 1"
rb2=list_obj[[2]]@result
rb2$class <-"Dataset 2"
rb3=list_obj[[3]]@result
rb3$class <-"Dataset 3"

rc1=list_objR[[1]]@result
rc1$class <-"Dataset 1"
rc2=list_objR[[2]]@result
rc2$class <-"Dataset 2"
rc3=list_objR[[3]]@result
rc3$class <-"Dataset 3"


table_allBP <-rbind(rb1,rb2,rb3)
table_allBP <-table_allBP[table_allBP$NES > 0,]

table_allRc <-rbind(rc1,rc2,rc3)
table_allRc <-table_allRc[table_allRc$NES > 0,]


sell <-names(table(table_allBP$ID)[table(table_allBP$ID)==3])
sellr <-names(table(table_allRc$ID)[table(table_allRc$ID)==3])


sel_pb <-table_allBP[table_allBP$ID %in% sell,]
sel_rc <-table_allRc[table_allRc$ID %in% sellr,]

sel_pb <-sel_pb[,c(2,5,7,12)]
sel_rc <-sel_rc[,c(1,5,7,12)]

Bubble plot of common enriched processes

ggplot(sel_pb, aes(y =class , x =Description)) + 
  ggtitle("GO: Biological Processes enriched in all the three experiments") +
  geom_point(data=sel_pb,aes(y =class , x =Description, size = NES, colour = -log(p.adjust)), alpha=.8)+
  scale_color_gradient(low="blue",high="red")+
  theme_bw()+ 
  theme(plot.title = element_text(face="bold",size=16),plot.margin = unit(c(2,1,1,2), "cm"),
        axis.text.x = element_text(angle = 60, vjust = 1, hjust=1, size=15, face="bold"),
        axis.text.y = element_text(size=15, face="bold"),
        legend.title=element_text(size=16), 
        legend.text=element_text(size=14))+
  labs(x="", y="", size="NES", color="-log10(FDR)")

ggplot(sel_rc, aes(y =class , x =ID)) + 
  ggtitle("REACTOME Pathways enriched in all the three experiments") +
  geom_point(data=sel_rc,aes(y =class , x =ID, size = NES, colour = -log(p.adjust)), alpha=.8)+
  scale_color_gradient(low="blue",high="red")+
  theme_bw()+ 
  theme(plot.title = element_text(face="bold", size=16),plot.margin = unit(c(5,1,1,5), "cm"), 
        axis.text.x = element_text(angle = 60, vjust = 1, hjust=1, size=11, face="bold"),
        axis.text.y = element_text(size=13, face="bold"),
        legend.title=element_text(size=16), 
        legend.text=element_text(size=14))+
  labs(x="", y="", size="NES", color="-log10(FDR)")

Session Info

## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.5 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] msigdbr_7.5.1         enrichplot_1.21.0     clusterProfiler_4.4.4
##  [4] org.Hs.eg.db_3.15.0   AnnotationDbi_1.58.0  IRanges_2.30.1       
##  [7] S4Vectors_0.34.0      Biobase_2.56.0        BiocGenerics_0.42.0  
## [10] lubridate_1.9.2       forcats_1.0.0         stringr_1.5.0        
## [13] dplyr_1.1.3           purrr_1.0.1           readr_2.1.4          
## [16] tidyr_1.3.0           tibble_3.2.1          tidyverse_2.0.0      
## [19] kableExtra_1.3.4      ggrepel_0.9.3         readxl_1.4.3         
## [22] ggplot2_3.4.3        
## 
## loaded via a namespace (and not attached):
##   [1] ggtree_3.6.2           fgsea_1.22.0           colorspace_2.1-0      
##   [4] qvalue_2.28.0          XVector_0.36.0         aplot_0.1.10          
##   [7] rstudioapi_0.15.0      farver_2.1.1           graphlayouts_1.0.0    
##  [10] bit64_4.0.5            scatterpie_0.2.1       fansi_1.0.4           
##  [13] xml2_1.3.4             codetools_0.2-19       splines_4.2.1         
##  [16] cachem_1.0.8           GOSemSim_2.22.0        knitr_1.43            
##  [19] polyclip_1.10-4        jsonlite_1.8.7         GO.db_3.15.0          
##  [22] png_0.1-8              ggforce_0.4.1          compiler_4.2.1        
##  [25] httr_1.4.7             lazyeval_0.2.2         Matrix_1.5-4.1        
##  [28] fastmap_1.1.1          cli_3.6.1              tweenr_2.0.2          
##  [31] htmltools_0.5.6        tools_4.2.1            igraph_1.5.0          
##  [34] gtable_0.3.4           glue_1.6.2             GenomeInfoDbData_1.2.8
##  [37] reshape2_1.4.4         DO.db_2.9              fastmatch_1.1-4       
##  [40] Rcpp_1.0.11            cellranger_1.1.0       jquerylib_0.1.4       
##  [43] vctrs_0.6.3            Biostrings_2.64.1      babelgene_22.9        
##  [46] nlme_3.1-162           ape_5.7-1              svglite_2.1.1         
##  [49] ggraph_2.1.0           xfun_0.39              rvest_1.0.3           
##  [52] timechange_0.2.0       lifecycle_1.0.3        DOSE_3.22.1           
##  [55] zlibbioc_1.42.0        MASS_7.3-58.3          scales_1.2.1          
##  [58] tidygraph_1.2.3        hms_1.1.3              parallel_4.2.1        
##  [61] RColorBrewer_1.1-3     yaml_2.3.7             memoise_2.0.1         
##  [64] gridExtra_2.3          downloader_0.4         ggfun_0.1.0           
##  [67] yulab.utils_0.0.6      sass_0.4.7             stringi_1.7.12        
##  [70] RSQLite_2.3.1          highr_0.10             tidytree_0.4.2        
##  [73] BiocParallel_1.30.4    GenomeInfoDb_1.32.4    rlang_1.1.1           
##  [76] pkgconfig_2.0.3        systemfonts_1.0.4      bitops_1.0-7          
##  [79] evaluate_0.21          lattice_0.21-8         labeling_0.4.3        
##  [82] treeio_1.20.2          patchwork_1.1.3        shadowtext_0.1.2      
##  [85] bit_4.0.5              tidyselect_1.2.0       plyr_1.8.8            
##  [88] magrittr_2.0.3         R6_2.5.1               generics_0.1.3        
##  [91] DBI_1.1.3              pillar_1.9.0           withr_2.5.0           
##  [94] KEGGREST_1.36.3        RCurl_1.98-1.12        crayon_1.5.2          
##  [97] utf8_1.2.3             tzdb_0.4.0             rmarkdown_2.24        
## [100] viridis_0.6.3          grid_4.2.1             data.table_1.14.8     
## [103] blob_1.2.4             digest_0.6.32          webshot_0.5.4         
## [106] gridGraphics_0.5-1     munsell_0.5.0          viridisLite_0.4.2     
## [109] ggplotify_0.1.0        bslib_0.5.1